library(bupaR)
## Loading required package: edeaR
## Loading required package: eventdataR
## Loading required package: processmapR
## Loading required package: xesreadR
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'xesreadR'
## Loading required package: processmonitR
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'processmonitR'
## Loading required package: petrinetR
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'petrinetR'
## 
## Attaching package: 'bupaR'
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:utils':
## 
##     timestamp
library(edeaR)
library(processmapR)
library(eventdataR)
library(readr)
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1     v purrr   0.3.2
## v tibble  2.1.1     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v ggplot2 3.1.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks bupaR::filter(), stats::filter()
## x dplyr::lag()    masks stats::lag()
library(DiagrammeR)
library(ggplot2)
library(stringr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
data <- read.csv('C:/credit_file.csv', header = T)
summary(data)
##                    Case_ID                       Activity     Resource 
##  Application_1746793196:28   O_Create Offer          :13   User_1 :34  
##  Application_428409768 :23   O_Created               :13   User_19:33  
##  Application_652823628 :19   O_Sent (mail and online):13   User_3 :17  
##  Application_1266995739:17   A_Validating            :11   User_17:11  
##  Application_619403287 :17   W_Validate application  :11   User_38: 7  
##  Application_1085880569:16   A_Accepted              :10   User_95: 7  
##  (Other)               :49   (Other)                 :98   (Other):60  
##                 Start_Timestamp               Complete_Timestamp
##  2016/01/01 10:51:15.304:  1    2016/01/01 10:51:15.304:  1     
##  2016/01/01 10:51:15.352:  1    2016/01/01 10:51:15.352:  1     
##  2016/01/01 10:52:36.413:  1    2016/01/01 10:52:36.413:  1     
##  2016/01/01 11:16:11.500:  1    2016/01/01 11:16:11.500:  1     
##  2016/01/01 11:16:11.549:  1    2016/01/01 11:16:11.549:  1     
##  2016/01/01 11:17:31.594:  1    2016/01/01 11:17:31.594:  1     
##  (Other)                :163    (Other)                :163     
##         Variant   Variant_index   X.case._ApplicationType
##  Variant 256:28   Min.   :  1.0   New credit:169         
##  Variant 209:23   1st Qu.:  6.0                          
##  Variant 1  :20   Median : 19.0                          
##  Variant 2  :19   Mean   :109.6                          
##  Variant 30 :17   3rd Qu.:209.0                          
##  Variant 6  :17   Max.   :388.0                          
##  (Other)    :45                                          
##                 X.case._creditGoal X.case._RequestedAmount  Accepted  
##  Car                     :53       Min.   : 5000           false:  4  
##  Existing credit takeover:52       1st Qu.: 7000           true :  9  
##  Home improvement        :64       Median :15000           NA's :156  
##                                    Mean   :13728                      
##                                    3rd Qu.:15000                      
##                                    Max.   :35000                      
##                                                                       
##          Action     CreditScore                       EventID   
##  Created    : 23   Min.   :   0.0   Application_1085880569:  1  
##  Deleted    : 12   1st Qu.:   0.0   Application_1266995739:  1  
##  Obtained   : 25   Median :   0.0   Application_1691306052:  1  
##  statechange:109   Mean   : 282.4   Application_1710223761:  1  
##                    3rd Qu.: 799.0   Application_1746793196:  1  
##                    Max.   :1059.0   Application_1878239836:  1  
##                    NA's   :156      (Other)               :163  
##       EventOrigin FirstWithdrawalAmount  MonthlyCost    NumberOfTerms   
##  Application:75   Min.   :    0         Min.   :141.4   Min.   : 33.00  
##  Offer      :57   1st Qu.:  400         1st Qu.:159.0   1st Qu.: 57.00  
##  Workflow   :37   Median : 3726         Median :200.0   Median : 84.00  
##                   Mean   : 8314         Mean   :233.1   Mean   : 83.54  
##                   3rd Qu.:15000         3rd Qu.:252.7   3rd Qu.:120.00  
##                   Max.   :35000         Max.   :498.3   Max.   :120.00  
##                   NA's   :156           NA's   :156     NA's   :156     
##              OfferID    OfferedAmount    Selected   lifecycle.transition
##  Offer_117620760 :  4   Min.   : 6000   false:  7   complete:144        
##  Offer_148581083 :  4   1st Qu.:13000   true :  6   start   : 25        
##  Offer_1534843733:  4   Median :15000   NA's :156                       
##  Offer_467024374 :  4   Mean   :15108                                   
##  Offer_774210695 :  4   3rd Qu.:15700                                   
##  (Other)         : 24   Max.   :35000                                   
##  NA's            :125   NA's   :156                                     
##               starttimestamp               endtimestamp
##  2016-01-02T11:03:05Z:  3    2016-01-02T11:03:05Z:  4  
##  2016-01-02T11:06:53Z:  3    2016-01-02T11:06:53Z:  4  
##  2016-01-02T11:30:28Z:  3    2016-01-02T11:43:56Z:  4  
##  2016-01-02T11:43:56Z:  3    2016-01-02T11:45:11Z:  4  
##  2016-01-02T11:45:11Z:  3    2016-01-02T14:44:54Z:  4  
##  2016-01-02T13:38:31Z:  3    2016-01-04T17:23:02Z:  4  
##  (Other)             :151    (Other)             :145

On peut voir les differentes modalités des variables du fichier log ‘credit file’. Par exemple, les modalités les plus fréquentes du feature Activity sont O_Create Offer, O_Created, O_Sent (mail and online), A_Validating, W_Validate application, A_Accepted et autres.

Conversion des champs date:

Pour aboutir à un fichier event log, nous aurons besoin de convertir les champs starttimesstamp et endtimestamp sous format Datetime.

data$starttimestamp = as.POSIXct(data$`Start_Timestamp`,
                                 format = "%Y/%m/%d %H:%M:%S")
data$endtimestamp = as.POSIXct(data$`Complete_Timestamp`, format = "%Y/%m/%d %H:%M:%S")

Event log:

Nous avons utilis? deux manières pour la création du fichier event.

Premi?re m?thode:

events <- bupaR::activities_to_eventlog(
  head(data, n = 10000),
  case_id = 'Case_ID',
  activity_id = 'Activity',
  resource_id = 'Resource',
  timestamps = c('starttimestamp', 'endtimestamp')
)

Deuxi?me m?thode:

events <- bupaR::activities_to_eventlog(
  data,
  case_id = 'Case_ID',
  activity_id = 'Activity',
  resource_id = 'Resource',
  timestamps = c('starttimestamp', 'endtimestamp')
)
summary(events)
## Number of events:  338
## Number of cases:  10
## Number of traces:  10
## Number of distinct activities:  22
## Average trace length:  33.8
## 
## Start eventlog:  2016-01-01 10:51:15
## End eventlog:  2016-02-29 08:00:48
##    Case_ID                              Activity      Resource  
##  Length:338         O_Create Offer          : 26   User_1 : 68  
##  Class :character   O_Created               : 26   User_19: 66  
##  Mode  :character   O_Sent (mail and online): 26   User_3 : 34  
##                     A_Validating            : 22   User_17: 22  
##                     W_Validate application  : 22   User_38: 14  
##                     A_Accepted              : 20   User_95: 14  
##                     (Other)                 :196   (Other):120  
##                 Start_Timestamp               Complete_Timestamp
##  2016/01/01 10:51:15.304:  2    2016/01/01 10:51:15.304:  2     
##  2016/01/01 10:51:15.352:  2    2016/01/01 10:51:15.352:  2     
##  2016/01/01 10:52:36.413:  2    2016/01/01 10:52:36.413:  2     
##  2016/01/01 11:16:11.500:  2    2016/01/01 11:16:11.500:  2     
##  2016/01/01 11:16:11.549:  2    2016/01/01 11:16:11.549:  2     
##  2016/01/01 11:17:31.594:  2    2016/01/01 11:17:31.594:  2     
##  (Other)                :326    (Other)                :326     
##         Variant   Variant_index   X.case._ApplicationType
##  Variant 256:56   Min.   :  1.0   New credit:338         
##  Variant 209:46   1st Qu.:  6.0                          
##  Variant 1  :40   Median : 19.0                          
##  Variant 2  :38   Mean   :109.6                          
##  Variant 30 :34   3rd Qu.:209.0                          
##  Variant 6  :34   Max.   :388.0                          
##  (Other)    :90                                          
##                 X.case._creditGoal X.case._RequestedAmount  Accepted  
##  Car                     :106      Min.   : 5000           false:  8  
##  Existing credit takeover:104      1st Qu.: 7000           true : 18  
##  Home improvement        :128      Median :15000           NA's :312  
##                                    Mean   :13728                      
##                                    3rd Qu.:15000                      
##                                    Max.   :35000                      
##                                                                       
##          Action     CreditScore                       EventID   
##  Created    : 46   Min.   :   0.0   Application_1085880569:  2  
##  Deleted    : 24   1st Qu.:   0.0   Application_1266995739:  2  
##  Obtained   : 50   Median :   0.0   Application_1691306052:  2  
##  statechange:218   Mean   : 282.4   Application_1710223761:  2  
##                    3rd Qu.: 799.0   Application_1746793196:  2  
##                    Max.   :1059.0   Application_1878239836:  2  
##                    NA's   :312      (Other)               :326  
##       EventOrigin  FirstWithdrawalAmount  MonthlyCost    NumberOfTerms   
##  Application:150   Min.   :    0         Min.   :141.4   Min.   : 33.00  
##  Offer      :114   1st Qu.:  400         1st Qu.:159.0   1st Qu.: 57.00  
##  Workflow   : 74   Median : 3726         Median :200.0   Median : 84.00  
##                    Mean   : 8314         Mean   :233.1   Mean   : 83.54  
##                    3rd Qu.:15000         3rd Qu.:252.7   3rd Qu.:120.00  
##                    Max.   :35000         Max.   :498.3   Max.   :120.00  
##                    NA's   :312           NA's   :312     NA's   :312     
##              OfferID    OfferedAmount    Selected   lifecycle.transition
##  Offer_117620760 :  8   Min.   : 6000   false: 14   complete:288        
##  Offer_148581083 :  8   1st Qu.:13000   true : 12   start   : 50        
##  Offer_1534843733:  8   Median :15000   NA's :312                       
##  Offer_467024374 :  8   Mean   :15108                                   
##  Offer_774210695 :  8   3rd Qu.:15700                                   
##  (Other)         : 48   Max.   :35000                                   
##  NA's            :250   NA's   :312                                     
##  activity_instance_id         lifecycle_id   timestamp                  
##  Length:338           endtimestamp  :169   Min.   :2016-01-01 10:51:15  
##  Class :character     starttimestamp:169   1st Qu.:2016-01-02 12:05:17  
##  Mode  :character                          Median :2016-01-02 15:13:23  
##                                            Mean   :2016-01-08 20:37:05  
##                                            3rd Qu.:2016-01-12 11:00:09  
##                                            Max.   :2016-02-29 08:00:48  
##                                                                         
##      .order      
##  Min.   :  1.00  
##  1st Qu.: 85.25  
##  Median :169.50  
##  Mean   :169.50  
##  3rd Qu.:253.75  
##  Max.   :338.00  
## 

Fréquence des évenements:

events %>%
  activity_frequency(level = "activity") %>%
  plot()

Nous pouvons voir les émails envoyés (O_send) et ceux créé par un offre ou non ont la méme fréquence qui est de l’ordre de 12.5 car dans n’importante quelle cr?ation de compte, tous les utilisateurs vont passer par les mémés étapes c’est à dire phase de creation de la demande d’où sa fréquence élévée. Nous remarquons de plus d’acception, d’inscription et formulaire complet sont les memes alors que ceux du refus sont moins fréquent (refused ou denied).

Traces des activités:

Les fonctions de tracé génériques peuvent être utilisées pour créer des graphiques prédéfinis, pouvant être personnalisés à l’aide de ggplot2.

events %>% 
  trace_explorer(coverage = 0.9)

events %>%
 filter_trace_frequency(perc = 0.9) %>%
 process_map()
## Warning: Prefixing `UQ()` with the rlang namespace is deprecated as of rlang 0.3.0.
## Please use the non-prefixed form or `!!` instead.
## 
##   # Bad:
##   rlang::expr(mean(rlang::UQ(var) * 100))
## 
##   # Ok:
##   rlang::expr(mean(UQ(var) * 100))
## 
##   # Good:
##   rlang::expr(mean(!!var * 100))
## 
## This warning is displayed once per session.
events %>% 
 dotted_chart
## Joining, by = "Case_ID"

Le graphique en pointillé personnalisable montre tous les événements par heure. Les relations de priorité entre les activités sont claires: c’est à dire au début on peut voir l’inscription puis le formulaire complet(A_submetted puis Complete application ) Janvier 01 puis les formulaires validés entre le 10 Janvier et fin Janvier puis s’il existe un refus (Denied ) Après le premier Février.

Conclusion:

Nous avons pu analyser un fichier log lié au crédit bancaire des clients afin de pouvoir analyser les activités de ces derniers dans le processus de demande de crédit bancaires. Nous avons utiliser les données présentes comme l’id, type d’activités, le debut et la fin des activités pour analyser les fréquences et les interpréter grace aux outils de visualisation.